Purpose: Quantify the suitability of existing modeling techniques for predicting streamflow in headwater systems.

Approach:

14.1 Data

Site information

Code
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)

Little g’s

Code
dat_clean <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/LittleG_data_clean.csv")

Big G’s

Code
dat_clean_big <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/BigG_data_clean.csv")

Climate

Code
climdf <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/Daymet_climate.csv")
climdf_summ <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/Daymet_climate_summary.csv")

Water availability

Code
wateravail <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/BigG_wateravailability_annual.csv")

14.2 Stream Stats

Write out point shape files for each state to feed into Stream Stats batch processor

Code
siteinfo_sp_wy <- siteinfo_sp %>% filter(region == "Snake")
st_write(siteinfo_sp_wy, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_wy.shp")

siteinfo_sp_mt <- siteinfo_sp %>% filter(region %in% c("Flat", "Shields"))
st_write(siteinfo_sp_mt, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_mt.shp")

siteinfo_sp_ma <- siteinfo_sp %>% filter(region == "Mass")
st_write(siteinfo_sp_ma, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_ma.shp")

siteinfo_sp_va <- siteinfo_sp %>% filter(region %in% c("Shen"))
st_write(siteinfo_sp_va, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_va.shp")

siteinfo_sp_or <- siteinfo_sp %>% filter(region %in% c("Oreg"))
st_write(siteinfo_sp_or, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_or.shp")

List geodatabase layer names

Code
st_layers(dsn = "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb")
Driver: OpenFileGDB 
Available layers:
            layer_name geometry_type features fields crs_name
1 GlobalWatershedPoint         Point       39      8   WGS 84
2      GlobalWatershed Multi Polygon       39     28   WGS 84
3      CHARACTERISTICS            NA     1921     11     <NA>
4            FLOWSTATS            NA     6763     16     <NA>

Read watershed boundaries

Code
sheds_montana <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb", layer = "GlobalWatershed")
Reading layer `GlobalWatershed' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_mt7617\points_mt7617.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 39 features and 28 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -114.8904 ymin: 43.9457 xmax: -109.7226 ymax: 49.46148
Geodetic CRS:  WGS 84
Code
sheds_massach <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_ma7625/points_ma7625.gdb", layer = "GlobalWatershed")
Reading layer `GlobalWatershed' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_ma7625\points_ma7625.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 13 features and 25 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -72.82306 ymin: 42.4123 xmax: -72.62871 ymax: 42.54973
Geodetic CRS:  WGS 84
Code
sheds_oregon <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_or7626/points_or7626.gdb", layer = "GlobalWatershed")
Reading layer `GlobalWatershed' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_or7626\points_or7626.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 7 features and 41 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -118.9295 ymin: 42.48917 xmax: -118.561 ymax: 42.79204
Geodetic CRS:  WGS 84
Code
sheds_virginia <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_va7627/points_va7627.gdb", layer = "GlobalWatershed")
Reading layer `GlobalWatershed' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_va7627\points_va7627.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 32 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -79.24034 ymin: 37.88165 xmax: -78.02949 ymax: 38.7622
Geodetic CRS:  WGS 84
Code
sheds_wyoming <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_wy7628/points_wy7628.gdb", layer = "GlobalWatershed")
Reading layer `GlobalWatershed' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_wy7628\points_wy7628.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 14 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -110.5241 ymin: 43.64373 xmax: -110.1598 ymax: 43.87029
Geodetic CRS:  WGS 84
Code
sheds <- bind_rows(sheds_massach, sheds_montana, sheds_oregon, sheds_virginia, sheds_wyoming)
mapview(sheds) 

Find sites that were delineated incorrectly

Code
options(scipen=999)
badsites <- tibble(sheds) %>% select(Name, Shape_Area, DRNAREA, ELEV) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name, area_sqmi, elev_ft)) %>% select(site_id, site_name, DRNAREA, area_sqmi) %>% mutate(percerror = (DRNAREA - area_sqmi) / area_sqmi) %>% filter(percerror >= 0.15 | percerror <= -0.15)
badsites
# A tibble: 15 × 5
   site_id site_name                      DRNAREA area_sqmi percerror
   <chr>   <chr>                            <dbl>     <dbl>     <dbl>
 1 WW      West Whately Brook           0.0399        0.493    -0.919
 2 WL      West Brook Lower             0.086         8.51     -0.990
 3 JB      Jimmy Brook                  7.87          0.974     7.08 
 4 SH08    Shields River ab Dugout     11.1           8.68      0.279
 5 SH06    Lodgepole Creek              2.2           1.36      0.619
 6 SH05    Dugout Creek                11.1           2.39      3.64 
 7 BIG_002 LangfordCreekLower           0.1           3.99     -0.975
 8 RAP     Rapidan River NWIS           0.0000386   115        -1.00 
 9 PI_09FL Piney River 09               0.36          4.28     -0.916
10 LEI     Leidy Creek Mouth NWIS       0.000811      5.17     -1.00 
11 PCM     Pacific Creek at Moran NWIS  0.34        166.       -0.998
12 SP10    SF Spread Creek Upper        0.000348     35.1      -1.00 
13 SP09    SF Spread Creek Lower       72            44.3       0.627
14 SP08    Rock Creek                   0.0000772     4.74     -1.00 
15 SP03    Leidy Creek Lower            0.00112       5.17     -1.00 

Stream stats site information

Code
streamstats_info <- tibble(sheds) %>% select(Name, DRNAREA) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name))
# streamstats_info

Read flow statistics from geodatabases

Code
montana <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb", layer = "FLOWSTATS")
Reading layer `FLOWSTATS' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_mt7617\points_mt7617.gdb' 
  using driver `OpenFileGDB'
Code
massach <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_ma7625/points_ma7625.gdb", layer = "FLOWSTATS")
Reading layer `FLOWSTATS' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_ma7625\points_ma7625.gdb' 
  using driver `OpenFileGDB'
Code
oregon <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_or7626/points_or7626.gdb", layer = "FLOWSTATS")
Reading layer `FLOWSTATS' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_or7626\points_or7626.gdb' 
  using driver `OpenFileGDB'
Code
virginia <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_va7627/points_va7627.gdb", layer = "FLOWSTATS")
Reading layer `FLOWSTATS' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_va7627\points_va7627.gdb' 
  using driver `OpenFileGDB'
Code
wyoming <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_wy7628/points_wy7628.gdb", layer = "FLOWSTATS")
Reading layer `FLOWSTATS' from data source 
  `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_wy7628\points_wy7628.gdb' 
  using driver `OpenFileGDB'
Code
streamstats <- bind_rows(montana, massach, oregon, virginia, wyoming) %>% filter(!Name %in% c(badsites$site_id)) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name)) %>% left_join(streamstats_info)
head(streamstats)
  site_id RegionID                             RegionName AreaPercent AreaSqMi
1     NFF   GC1906                  Crippen_Bue_Region_13          61 947.3549
2     NFF   GC1828                        USA_Bieger_2015          60 937.1283
3     NFF   GC1828                        USA_Bieger_2015          60 937.1283
4     NFF   GC1828                        USA_Bieger_2015          60 937.1283
5     NFF   GC1818 Northern_Rocky_Mountains_P_Bieger_2015          60 937.1283
6     NFF   GC1818 Northern_Rocky_Mountains_P_Bieger_2015          60 937.1283
   StatLabel                                StatName    Value
1 PKMAX_CB_R      Maximum Flood Crippen Bue Regional 2.46e+05
2 XABNKF_U_B Bieger_USA_channel_cross_sectional_area 9.05e+02
3 DBANKF_U_B                Bieger_USA_channel_depth 5.77e+00
4 WBANKF_U_B                Bieger_USA_channel_width 1.65e+02
5 XABNKF_P_B   Bieger_P_channel_cross_sectional_area 8.52e+02
6 DBANKF_P_B                  Bieger_P_channel_depth 5.53e+00
                  Units Years PIl PIu SE SEp PC CitationID
1 cubic feet per second    NA  NA  NA NA  NA NA        186
2           square feet     0  NA  NA NA  NA NA        160
3                  feet     0  NA  NA NA  NA NA        160
4                  feet     0  NA  NA NA  NA NA        160
5           square feet     0  NA  NA NA  NA NA        160
6                  feet     0  NA  NA NA  NA NA        160
                       site_name DRNAREA
1 North Fork Flathead River NWIS  1556.2
2 North Fork Flathead River NWIS  1556.2
3 North Fork Flathead River NWIS  1556.2
4 North Fork Flathead River NWIS  1556.2
5 North Fork Flathead River NWIS  1556.2
6 North Fork Flathead River NWIS  1556.2

View provided flow statistics for each state. What is relevant to this paper?

  • Montana: annual & monthly duration (80, 50, 20), monthly mean flow, 7 day 10 year low flow
  • Massachusetts: annual duration (99, 98, 95, 90, 85, 80, 75, 70, 60, 50), 7 day 10 and 2 year low flow
  • Oregon: annual and monthly duration (95, 50, 25, 10, 5), monthly and annual 7 day 10 and 2 year low flow
  • Virginia: lots of flow flow stats including 7 day 10 and 2 year low flow
  • Wyoming: nothing relevant
Code
print("MONTANA")
unique(montana$StatName)
print("MASSACHUSETTS")
unique(massach$StatName)
print("OREGON")
unique(oregon$StatName)
print("VIRGINA")
unique(virginia$StatName)
print("WYOMING")
unique(wyoming$StatName)

Availability of flow statistics varies greatly by state. What statistics do all states have in common (minus Wyoming)? Only 7 day 10 year low flow is relevant.

Code
Reduce(intersect, list(unique(montana$StatName), unique(massach$StatName), unique(oregon$StatName), unique(virginia$StatName)))
 [1] "Maximum Flood Crippen Bue Regional"     
 [2] "Bieger_USA_channel_cross_sectional_area"
 [3] "Bieger_USA_channel_depth"               
 [4] "Bieger_USA_channel_width"               
 [5] "Bieger_P_channel_cross_sectional_area"  
 [6] "Bieger_P_channel_depth"                 
 [7] "Bieger_P_channel_width"                 
 [8] "Bieger_D_channel_cross_sectional_area"  
 [9] "Bieger_D_channel_depth"                 
[10] "Bieger_D_channel_width"                 
[11] "7 Day 10 Year Low Flow"                 

14.2.1 West Brook exceedance

For the West Brook, plot (annual) observed and StreamStats exceedance/duration curves and calculate absolute error

Code
# set up
vars <- unique(massach$StatName)[grep("Duration", unique(massach$StatName))][-1]
sites <- c("Avery Brook", "Sanderson Brook", "Mitchell Brook", "Obear Brook Lower", "West Brook NWIS", "West Brook Upper")
preds <- list()
exceed <- list()
joined <- list()
joined_full <- list()

# calcualate 
for (i in 1:length(sites)) {
  obs <- dat_clean %>% filter(site_name == sites[i])
  # stream stats duration
  p <- streamstats %>% 
    filter(site_name == unique(obs$site_name), StatName %in% vars) %>% 
    mutate(exceedance = parse_number(StatName)) %>% 
    mutate(flow_cms = Value*0.02831683199881, area_sqkm = DRNAREA*2.58999)
  p <- add_daily_yield(data = p %>% select(site_id, site_name, DRNAREA, area_sqkm, StatName, exceedance, flow_cms), values = flow_cms, basin_area = as.numeric(unique(p$area_sqkm)))
  p <- p %>% mutate(logYield = log(Yield_mm))
  preds[[i]] <- p
  # calculate exceedance probability by site
  exceeddat <- obs %>% 
    filter(!is.na(logYield)) %>%
    arrange(desc(logYield)) %>%
    mutate(exceedance = 100/length(logYield)*1:length(logYield))
  exceed[[i]] <- exceeddat
  # join observed and streamstats exceedance, calculate error
  j <- exceeddat %>% 
    select(site_name, exceedance, logYield) %>%
    mutate(exceedance = round(exceedance, digits = 0)) %>%
    group_by(site_name, exceedance) %>%
    summarize(logYield = mean(logYield)) %>%
    ungroup() %>%
    left_join(p %>%
                select(site_name, exceedance, logYield) %>%
                rename(logYield_ss = logYield)) %>%
    mutate(error_abs = logYield_ss - logYield,
           error_abs_real = exp(logYield_ss) - exp(logYield),
           error_rel = (exp(logYield_ss) - exp(logYield)) / exp(logYield))
  joined[[i]] <- j %>% filter(!is.na(error_abs))
  joined_full[[i]] <- j
}
preds <- do.call(rbind, preds) 
exceed <- do.call(rbind, exceed)
joined <- do.call(rbind, joined)
joined_full <- do.call(rbind, joined_full)
joined_mean <- joined %>% group_by(exceedance) %>% summarize(error_abs = mean(error_abs, na.rm = TRUE))

# preds <- preds %>% mutate(site_name = factor(site_name, levels = sites))
# exceed <- exceed %>% mutate(site_name = factor(site_name, levels = sites))
# joined <- joined %>% mutate(site_name = factor(site_name, levels = sites))

# calculate among size variation for StreamStats and observed exceedance 
vardat_ss <- joined %>% group_by(exceedance) %>% summarize(exdsd = sd(logYield_ss))
vardat_obs <- joined_full %>% group_by(exceedance) %>% summarize(exdsd = sd(logYield))

# tibble for site labels
siteslabs <- tibble(site_name = sites)
Code
### Colored by site
# exceedance curves
p1 <- ggplot() +
  geom_line(data = exceed, aes(x = exceedance, y = logYield, color = site_name), size = 1) +
  geom_line(data = preds, aes(x = exceedance, y = logYield), color = "black") + 
  geom_point(data = preds, aes(x = exceedance, y = logYield), color = "black") +
  geom_text(data = siteslabs, aes(x = Inf, y = Inf, label = site_name), vjust = 1.5, hjust = 1.05, size = 3) +
  facet_wrap(~site_name, nrow = 3) +
  xlab("Exceedance probability") + ylab("log(Yield, mm)") +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     panel.grid = element_blank(), strip.text.x = element_blank(), strip.background = element_blank(),
                     legend.position = "none")
# absolute error
p2 <- ggplot(data = joined) +
  geom_line(aes(x = exceedance, y = error_abs, group = site_name, color = site_name)) +
  geom_point(aes(x = exceedance, y = error_abs, group = site_name, color = site_name)) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  geom_line(data = joined_mean, aes(x = exceedance, y = error_abs), size = 1) +
  xlab("Exceedance probability") + ylab("Absolute error") + ylim(-2.1,0) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))

Code
### No Color
# exceedance curves
p1 <- ggplot() +
  geom_line(data = exceed, aes(x = exceedance, y = logYield), size = 1) +
  geom_line(data = preds, aes(x = exceedance, y = logYield), color = "red") + 
  geom_point(data = preds, aes(x = exceedance, y = logYield), color = "red") +
  geom_text(data = siteslabs, aes(x = Inf, y = Inf, label = site_name), vjust = 1.5, hjust = 1.05, size = 3) +
  facet_wrap(~site_name, nrow = 3) +
  xlab("Exceedance probability") + ylab("log(Yield, mm)") +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     panel.grid = element_blank(), strip.text.x = element_blank(), strip.background = element_blank(),
                     legend.position = "none")
# absolute error
p2 <- ggplot(data = joined) +
  geom_line(aes(x = exceedance, y = error_abs, group = site_name)) +
  geom_point(aes(x = exceedance, y = error_abs, group = site_name)) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  geom_line(data = joined_mean, aes(x = exceedance, y = error_abs), size = 1, col = "red") +
  xlab("Exceedance probability") + ylab("Absolute error") + ylim(-2.1,0) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))

Does StreamStats misrepresent among-site variation in flow duration?

Code
p1 <- ggplot() +
  geom_line(data = vardat_obs, aes(x = exceedance, y = exdsd), size = 1, color = "grey60") +
  geom_line(data = vardat_ss, aes(x = exceedance, y = exdsd), color = "black") + 
  geom_point(data = vardat_ss, aes(x = exceedance, y = exdsd), color = "black") +
  xlab("Exceedance probability") + ylab("Among-site standard deviation in log(Yield, mm)") +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

p2 <- vardat_obs %>% 
  left_join(vardat_ss %>% rename(exdsd_ss = exdsd)) %>% 
  mutate(diff = exdsd_ss - exdsd) %>%
  filter(!is.na(diff)) %>%
  ggplot() +
  geom_line(aes(x = exceedance, y = diff)) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  xlab("Exceedance probability") + ylab("Difference") +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 

annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))

14.2.2 Flathead mean monthly yield

For the Flathead (Big Creek), plot observed and StreamStats mean monthly flow/yield and calculate absolute error

Code
# set up
vars <- unique(montana$StatName)[grep("Mean Flow", unique(montana$StatName))]
sites <- c("Hallowat Creek NWIS", "HallowattCreekLower", "WernerCreek", "LangfordCreekUpper", "Big Creek NWIS", "McGeeCreekLower")
preds_list <- list()
obs_list <- list()
hull_list <- list()
join_list <- list()

# calcualate 
for (i in 1:length(sites)) {
  # filter observed data
  obs <- dat_clean %>% 
  filter(site_name == sites[i]) %>%
    mutate(MonthName = factor(MonthName, levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")))
  # calculate monthly means
  obs_mon <- obs %>%
    group_by(site_name, WaterYear, MonthName) %>%
    summarize(logYield = mean(logYield)) %>%
    ungroup() %>%
    mutate(MonthName = factor(MonthName, levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")),
           WaterYear = factor(WaterYear)) %>% 
    complete(site_name, WaterYear, MonthName)
  # get monthly min/max for ribbon
  hull <- obs_mon %>% 
    group_by(site_name, MonthName) %>% 
    summarize(min_logYield = min(logYield, na.rm = TRUE), max_logYield = max(logYield, na.rm = TRUE)) %>% 
    ungroup()
  # get StreamStats mean monthly flow
  preds <- streamstats %>% 
    filter(site_name == sites[i], StatName %in% vars, !is.na(AreaSqMi)) %>% 
    mutate(MonthName = substr(StatName, 1, nchar(StatName)-10),
           Month = parse_number(StatLabel)) %>% 
    mutate(MonthName = factor(recode(MonthName, "January" = "Jan", "February" = "Feb", "March" = "Mar", "April" = "Apr", "June" = "Jun", "July" = "Jul", "August" = "Aug", "September" = "Sep", "October" = "Oct", "November" = "Nov", "December" = "Dec"), levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep"))) %>%
    mutate(flow_cms = Value*0.02831683199881, area_sqkm = AreaSqMi*2.58999)
  preds <- add_daily_yield(data = preds %>% select(site_id, site_name, area_sqkm, MonthName, Month, flow_cms), values = flow_cms, basin_area = as.numeric(unique(preds$area_sqkm)))
  preds <- preds %>% mutate(logYield = log(Yield_mm))
  # join and calculate error
  join_list[[i]] <- obs_mon %>%
    left_join(preds %>% select(site_name, MonthName, logYield) %>% rename(logYield_ss = logYield)) %>%
    mutate(error_abs = logYield_ss - logYield,
           error_abs_real = exp(logYield_ss) - exp(logYield),
           error_rel = (exp(logYield_ss) - exp(logYield)) / exp(logYield))
  # store in list
  preds_list[[i]] <- preds
  obs_list[[i]] <- obs_mon
  hull_list[[i]] <- hull
}
preds <- do.call(rbind, preds_list) 
obs_mon <- do.call(rbind, obs_list)
hull <- do.call(rbind, hull_list)
joined <- do.call(rbind, join_list)
joined_mean <- joined %>% group_by(MonthName) %>% summarise(error_abs = mean(error_abs, na.rm = TRUE))

# tibble for site labels
siteslabs <- tibble(site_name = sites, site_lab = c("Hallowatt Upper", "Hallowatt Lower", "Werner", "Langford Upper", "Big NWIS", "McGee Lower"))

# calculate among site variation for StreamStats and observed mean monthly flow (mean across years) 
vardat <- preds %>% 
  group_by(MonthName) %>% 
  summarize(qsd_ss = sd(logYield)) %>%
  ungroup() %>%
  left_join(obs_mon %>% 
  group_by(site_name, MonthName) %>% 
  summarize(logYield = mean(logYield, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(MonthName) %>%
  summarize(qsd_obs = sd(logYield)) %>%
  ungroup()) %>%
  mutate(diff = qsd_ss - qsd_obs, nummon = as.numeric(MonthName))
Code
### Colored by site
# observed and StreamStats monthly flow
p1 <- ggplot() +
  geom_ribbon(data = hull, aes(ymin = min_logYield, ymax = max_logYield, x = as.numeric(MonthName), fill = site_name), alpha = 0.3) +
  geom_line(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, color = site_name)) +
  geom_point(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, shape = WaterYear, color = site_name)) +
  geom_line(data = preds, aes(y = logYield, x = as.numeric(MonthName), group = site_name), color = "black") +
  geom_point(data = preds, aes(y = logYield, x = as.numeric(MonthName)), color = "black") +
  geom_text(data = siteslabs, aes(x = -Inf, y = Inf, label = site_lab), vjust = 1.5, hjust = -0.05, size = 3) +
  facet_wrap(~site_name, ncol = 2) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none", 
                     strip.text.x = element_blank(), strip.background = element_blank()) +  
  ylab("Monthly mean log(Yield)") + xlab("Month") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S"))
# absolute error by month
p2 <- ggplot() +
  geom_line(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear, color = site_name)) +
  geom_point(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear, color = site_name)) +
  geom_line(data = joined_mean, aes(x = as.numeric(MonthName), y = error_abs), size = 1) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  xlab("Month") + ylab("Absolute error") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))

Code
### No color
# observed and StreamStats monthly flow
p1 <- ggplot() +
  geom_ribbon(data = hull, aes(ymin = min_logYield, ymax = max_logYield, x = as.numeric(MonthName)), fill = "grey80") +
  geom_line(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear)) +
  geom_point(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, shape = WaterYear)) +
  geom_line(data = preds, aes(y = logYield, x = as.numeric(MonthName), group = site_name), color = "red") +
  geom_point(data = preds, aes(y = logYield, x = as.numeric(MonthName)), color = "red") +
  geom_text(data = siteslabs, aes(x = -Inf, y = Inf, label = site_lab), vjust = 1.5, hjust = -0.05, size = 3) +
  facet_wrap(~site_name, ncol = 2) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none", 
                     strip.text.x = element_blank(), strip.background = element_blank()) +  
  ylab("Monthly mean log(Yield)") + xlab("Month") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S"))
# absolute error by month
p2 <- ggplot() +
  geom_line(data = joined, aes(x = as.numeric(MonthName), y = error_abs, group = interaction(site_name, WaterYear))) +
  geom_point(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear)) +
  geom_line(data = joined_mean, aes(x = as.numeric(MonthName), y = error_abs), size = 1, col = "red") +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  xlab("Month") + ylab("Absolute error") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))

Does StreamStats misrepresent among-site variation in mean monthly yield?

Code
p1 <- vardat %>%
  ggplot() +
  geom_line(aes(x = nummon, y = qsd_obs), size = 1, color = "grey60") +
  geom_line(aes(x = nummon, y = qsd_ss), color = "black") + 
  geom_point(aes(x = nummon, y = qsd_ss), color = "black") +
  xlab("Month") + ylab("Among-site standard deviation in log(Yield, mm)") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

p2 <- vardat %>%
  ggplot() +
  geom_line(aes(x = nummon, y = diff)) + 
  xlab("Month") + ylab("Difference") +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
  scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))

14.3 PRMS

14.4 UM Climatology

14.4.1 Get predictions

Pull streamflow predictions from Montana Climate Office’s (University of Montana) Streamflow API. The finest spatial resolution of streamflow predictions is HUC-10. This means that all sites within subbasins have the same predicted streamflow. This makes we wonder how relevant/useful these comparisons are, beyond g/G comparisons shown for objective 1.

Get HUC-10 watershed codes

Code
myhucs <- c()
for (i in 1:dim(siteinfo_sp)[1]) { myhucs[i] <- get_huc(AOI = siteinfo_sp[i,], type = "huc10")$huc10 }
siteinfo <- siteinfo %>% mutate(huc10 = myhucs)
myhucs <- unique(myhucs)
#length(myhucs)

Query UM Climatology

Code
request = httr::GET(
  # can replace this with /predictions/raw, the only query parameter that isn't shared is aggregations.
  "https://data.climate.umt.edu/streamflow-api/predictions/",
  query = list(
    locations = paste(myhucs, collapse = ","),
    date_start = "2015-01-01",
    date_end = "2025-01-01",
    aggregations = "mean", 
    as_csv = TRUE,
    units = "mm"
  )
)
umpreds <- httr::content(request)
umpreds <- umpreds %>% rename(huc10 = location)
print(umpreds)
write_csv(umpreds, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/UM_Climatology_PredictedQ.shp")

Load UM Climatrology predicted flow data

Code
umpreds <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/UM_Climatology_PredictedQ.shp")
umpreds
# A tibble: 58,464 × 5
   huc10      version  date       metric value
   <chr>      <chr>    <date>     <chr>  <dbl>
 1 0108020106 vPUB2025 2015-01-01 mean    2.14
 2 0108020106 vPUB2025 2015-01-02 mean    1.86
 3 0108020106 vPUB2025 2015-01-03 mean    1.93
 4 0108020106 vPUB2025 2015-01-04 mean    4.00
 5 0108020106 vPUB2025 2015-01-05 mean    4.04
 6 0108020106 vPUB2025 2015-01-06 mean    3.07
 7 0108020106 vPUB2025 2015-01-07 mean    2.43
 8 0108020106 vPUB2025 2015-01-08 mean    2.16
 9 0108020106 vPUB2025 2015-01-09 mean    2.03
10 0108020106 vPUB2025 2015-01-10 mean    1.90
# ℹ 58,454 more rows

Join to observed data

Code
dat_umpred <- dat_clean %>%
  left_join(siteinfo %>% select(site_name, huc10)) %>%
  left_join(umpreds %>% select(huc10, date, value)) 

14.4.2 View predictions

14.4.2.1 All HUCs

Plot all time series data

Code
umpreds %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() +
  facet_wrap(~huc10, scales = "free_y")

14.4.2.2 Compare gG

Compare predicted (red) and observed (black) streamflow data for select sites/basins. For some sites, modeled flow captures the general patterns/shapes of observed hydrographs well, but accuracy is reduced at fine temporal resolutions. For other sites, modeled flow fails to capture both the general and finer resolution asepcts of observed data.

Code
dat_umpred %>% filter(site_name == "Jimmy Brook") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") 
Code
dat_umpred %>% filter(site_name == "Staunton River 06") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector()  %>% dyAxis("y", label = "Yield (mm)") 
Code
dat_umpred %>% filter(site_name == "Hallowat Creek NWIS") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector()  %>% dyAxis("y", label = "Yield (mm)") 
Code
dat_umpred %>% filter(site_name == "SF Spread Creek Lower") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector()  %>% dyAxis("y", label = "Yield (mm)") 
Code
dat_umpred %>% filter(site_name == "Dugout Creek") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector()  %>% dyAxis("y", label = "Yield (mm)") 
Code
dat_umpred %>% filter(site_name == "Donner Blitzen ab Indian NWIS") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector()  %>% dyAxis("y", label = "Yield (mm)") 

14.4.3 Efficiency

NSE scores and categories per Moriasi et al. “Model evaluation guidelines for systematic quantification of accuracy in watershed simulations.” Transactions of the ASABE 50.3 (2007): 885-900.

Category NSE range
Very good 0.75-1.00
Good 0.65-0.75
Satisfactory 0.50-0.65
Unsatisfactory <0.50

Note that there is no effort to ensure consistant data availability among sites/basins/years. Would it be better to restrict these?

14.4.3.1 Overall

NSE by subbasin for all available data.

Code
dat_umpred %>% 
  group_by(site_name, subbasin) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
  ggplot(aes(x = subbasin, y = nse)) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_boxplot(fill = "grey", outlier.shape = NA) +
  geom_jitter(height = 0, width = 0.2, shape = 1) +
  geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
  xlab("Sub-basin") + ylab("Nash-Sutcliffe efficiency") + ggtitle("All data") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.4)) +
  scale_y_continuous(limits = c(-8.5,1), expand = c(0,0))

NSE by subbasin, summer (July-August) only, all available years.

Code
dat_umpred %>% 
  filter(Month %in% c(7:9)) %>%
  group_by(site_name, subbasin) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
  ggplot(aes(x = subbasin, y = nse)) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_boxplot(fill = "grey", outlier.shape = NA) +
  geom_jitter(height = 0, width = 0.2, shape = 1) +
  geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
  xlab("Sub-basin") + ylab("Nash-Sutcliffe efficiency") + ggtitle("July-September") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.4)) +
  scale_y_continuous(limits = c(-12.5,1), expand = c(0,0))

14.4.3.2 By time

How does NSE vary through time?

NSE by subbasin and month, pooled across all available years

Code
dat_umpred %>% 
  group_by(site_name, subbasin, MonthName) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen")),
         MonthName = factor(MonthName, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))) %>%
  ggplot(aes(x = MonthName, y = nse)) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_point(aes(group = site_name)) +
  geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
  scale_x_discrete(labels = c("Jan" = "J", "Feb" = "F", "Mar" = "M", "Apr" = "A", "May" = "M", "Jun" = "J", "Jul" = "J", "Aug" = "A", "Sep" = "S", "Oct" = "O", "Nov" = "N", "Dec" = "D")) +
  xlab("Month") + ylab("Nash-Sutcliffe efficiency") + ggtitle("All data") +
  facet_wrap(~subbasin, scales = "free_y") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  scale_y_continuous(expand = c(0,0))

For the West Brook, show NSE by time and site, no pooling.

Code
dat_umpred %>% 
  mutate(yearmonth = floor_date(date, "month")) %>% 
  filter(subbasin == "West Brook") %>%
  group_by(site_name, subbasin, yearmonth) %>% 
  summarize(nobs = n(), nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  filter(nobs >= 25) %>%
  ggplot(aes(x = yearmonth, y = nse)) + 
  # annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  # annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  # annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  # annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_point(aes(color = site_name)) +
  geom_line(aes(color = site_name)) +
  geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
  xlab("Time") + ylab("Nash-Sutcliffe efficiency") + ggtitle("West Brook (truncated y-axis limits)") +
  #facet_wrap(~subbasin, scales = "free") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ylim(-50,1)

14.4.3.3 Timescale

Often, ecological studyies use flow data aggregated at coarser temporal resolutions. How does efficiency change with the time scale of aggregation? Do flow predictions become more accurate when aggregated from daily to weekly or monthly means?

Aggregate data (or not), calculate NSE for each site, and combine.

Code
daily <- dat_umpred %>% 
  group_by(site_name, subbasin) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(timescale = "day")

weekly <- dat_umpred %>% 
  mutate(date = floor_date(date, "week")) %>% 
  group_by(site_name, subbasin, date) %>% 
  summarise(nobs = n(), Yield_mm = mean(Yield_mm), value = mean(value)) %>%
  ungroup() %>%
  filter(nobs == 7) %>%
  group_by(site_name, subbasin) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(timescale = "week")

monthly <- dat_umpred %>% 
  mutate(date = floor_date(date, "month")) %>% 
  group_by(site_name, subbasin, date) %>% 
  summarise(nobs = n(), Yield_mm = mean(Yield_mm), value = mean(value)) %>%
  ungroup() %>%
  filter(nobs >= 27) %>%
  group_by(site_name, subbasin) %>% 
  summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
  ungroup() %>%
  mutate(timescale = "month")

timescale <- bind_rows(daily, weekly, monthly) %>%
  mutate(timescale = factor(timescale, levels = c("day", "week", "month"))) 

Plot all sites

Code
timescale %>%
  ggplot(aes(x = timescale, y = nse, group = site_name)) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_point() +
  geom_line() +
  xlab("Time scale") + ylab("Nash-Sutcliffe efficiency") + 
  theme_bw() + ylim(-6,1) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  scale_y_continuous(limits = c(-14,1), expand = c(0,0))

Plot all sites, facet by subbasin

Code
timescale %>%
  mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
  ggplot(aes(x = timescale, y = nse, group = site_name)) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
  geom_point() +
  geom_line() +
  xlab("Time scale") + ylab("Nash-Sutcliffe efficiency") + 
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~subbasin, scales = "free_y")

Generally, aggregating daily flow predictions to weekly or monthly means does not substantially change the accuracy of predicted data. However, there is a slight trend for accuracy (NSE) to increase with temporal aggregation for sites at which daily predictions are already fairly accurate. In contrast, for sites for which daily predictions are not accurate, temporal aggregation further decreases accuracy.